rm(list=ls())

library(openxlsx)
library(ggplot2)
library(ggthemes)
library(reshape2)
library(readr)
library(tokenizers)
library(dplyr)
library(countrycode)
library(survival)
library(zoo)
library(ggthemes)
library(tidyr)
library(sp)
library(plyr)
library(estprod)
library(stringr)


recode_countries <- function(vec, scheme_1, scheme_2){
  # new values come first
  recode_vals <- as.character(unique(countryref[countryref$type == 'country', scheme_2]))
  names(recode_vals) <- as.character(unique(countryref[countryref$type == 'country', scheme_1]))
  
  recode_vals <- recode_vals[!is.na(names(recode_vals))]
  
  
  vec <- recode(vec, !!!recode_vals)
  return(vec)
}

borders_unga_speeches <- read.csv('kw_relevance_predicted_set.csv', stringsAsFactors = FALSE)
full_unga_speeches <- read.csv('UNGAplenary_tiles_v5.csv', stringsAsFactors = FALSE)

# filter non-relevant undergrad speeches
borders_unga_speeches <- borders_unga_speeches %>% select(-starts_with('x')) %>% filter(Predicted == 1)

full_unga_speeches$n_words <- count_words(full_unga_speeches$text)
borders_unga_speeches$n_words <- count_words(borders_unga_speeches$text)

full_unga_speeches$n_words <- count_words(full_unga_speeches$text)
borders_unga_speeches$n_words <- count_words(borders_unga_speeches$text)

######################
# Emotion extraction #
######################
library(tidytext)
library(readtext)
library(dplyr)
library(lsa)
library(spacyr)
library(stringr)

rheault_anxiety <- read_csv('anxiety-lexicon.csv')

unnested <- unnest_tokens(borders_unga_speeches %>% select(country, year, text, doc_id) %>% mutate(text = str_replace_all(text, 'United\\s+Nations|Council|General\\s+Assembly|Organization', ' ')),
                          word,text)

all_unnested <- unnest_tokens(full_unga_speeches %>% select(country, year, text, doc_id) %>% mutate(text = str_replace_all(text, 'United Nations|Council|General Assembly|Organization', ' ')),
                              word,text)

# reimplementing rheault's anxiety extraction approach
cosine_high <- function(x){
  length(x)
  anchor_high %>% rowwise() %>% 
    dplyr::mutate(high_sim = cosine(x, c_across(X2:X301))) %>% 
    select(high_sim) %>% ungroup() %>% 
    dplyr::summarise(sum(high_sim)) %>% unlist() %>% unname()
}

cosine_low <- function(x){
  length(x)
  anchor_low %>% rowwise() %>% 
    dplyr::mutate(low_sim = cosine(x, c_across(X2:X301))) %>% 
    select(low_sim) %>% ungroup() %>% 
    dplyr::summarise(sum(low_sim)) %>% unlist() %>% unname()
}

glove <- read_delim("glove.6B.300d.txt", 
                    delim = " ", col_names = FALSE, quote="")
names(glove)[1] <- 'lemma'

anchor_high <- rheault_anxiety %>% filter(anxiety == 1) %>%
  left_join(glove, by='lemma')

anchor_low <- rheault_anxiety %>% filter(anxiety == -1) %>%
  left_join(glove, by='lemma')

tokens <- data.frame(lemma = c(unnested$word, all_unnested$word)) %>%
  group_by(lemma) #%>% filter(n() > 5)
length(unique(tokens$lemma))

anxiety_scores <- glove %>% filter(lemma %in% unique(tokens$lemma)) %>% rowwise() %>% 
  dplyr::mutate(high = cosine_high(c_across(X2:X301)),
                low = cosine_low(c_across(X2:X301))) %>%
  select(lemma, high, low) %>% 
  mutate(score = (high - low),
         scaled_score = 2*(score - min(score))/(max(score) - min(score)) - 1) #this line of code can take hours to run

anxiety_counts <- unnested %>%  
  inner_join(anxiety_scores, by = c('word' = 'lemma')) %>%
  group_by(year) %>% dplyr::summarise(mean_anxiety = mean(scaled_score))

all_anxiety_counts <- all_unnested %>% 
  inner_join(anxiety_scores, by = c('word' = 'lemma')) %>%
  group_by(year) %>% dplyr::summarise(mean_anxiety = mean(scaled_score))

combined_anxiety <- rbind(anxiety_counts %>% mutate(type = 'borders'),
                          all_anxiety_counts %>% mutate(type = 'all')) 


# Figure 7
ggplot(combined_anxiety, aes(x=year, y=mean_anxiety, color=type)) + 
  geom_smooth(fill='grey85', alpha=1) + 
  ylab('Anxiety Prevalence') + xlab(NULL) + 
  scale_color_manual(labels=c('All Documents', 'Borders Only'), name=NULL, values=c('blue', 'red')) + 
  scale_x_continuous(breaks = c(1970, 1990, 2010)) + 
  ggtitle('Anxiety, all documents vs. border-relevant (higher = more anxious)') + 
  theme(legend.position = 'bottom',
        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))
